import pandas as pd
import plotly.express as px
#read in data for fast chargers
chargingstation_dc = pd.read_csv('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/alt_fuel_stations_dc.csv')
#create density map for fast chargers
fig = px.density_mapbox(chargingstation_dc,lat='Latitude',lon='Longitude',radius=1, zoom=1.5, mapbox_style = "stamen-terrain")
fig.show()
#create geography scatter map for fast chargers
fig = px.scatter_geo(chargingstation_dc,lat='Latitude',lon='Longitude',hover_name="City",color="EVConnectorTypes",size="EV.DC.Fast.Count")
fig.show()
chargingstation_dc = chargingstation_dc[chargingstation_dc.EVConnectorTypes != "TESLA"]
chargingstation_dc
#read in data for level 2 and dc chargers, tesla filtered
chargingstation__lv2dc_filtered = pd.read_csv('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/alt_fuel_stations_lv2dc.filtered.csv')
#create denisty map for level 2 and dc chargers, tesla filtered
fig = px.density_mapbox(chargingstation__lv2dc_filtered,lat='Latitude',lon='Longitude',radius=1, zoom=1.5, mapbox_style = "stamen-terrain")
fig.show()
#create geography map of level 2 and fast chargers, tesla filter with size = total number of both types
fig = px.scatter_geo(chargingstation__lv2dc_filtered,lat='Latitude',lon='Longitude',hover_name="City",color="EV.Connector.Types",size="Level2.DC.Total")
fig.show()
#filter charging stations for Minnesota to account for travel locations on the border
chargingstation_dc_mn = chargingstation_dc[chargingstation_dc.State == "MN"]
chargingstation__lv2dc_filtered_mn = chargingstation__lv2dc_filtered[chargingstation__lv2dc_filtered.State == "MN"]
#filter charging stations for Iowa
chargingstation_dc_ia = chargingstation_dc[chargingstation_dc.State == "IA"]
chargingstation__lv2dc_filtered_ia = chargingstation__lv2dc_filtered[chargingstation__lv2dc_filtered.State == "IA"]
#filter charging stations for Illinois
chargingstation_dc_il = chargingstation_dc[chargingstation_dc.State == "IL"]
chargingstation__lv2dc_filtered_il = chargingstation__lv2dc_filtered[chargingstation__lv2dc_filtered.State == "IL"]
#filter charging stations for Michigan
chargingstation_dc_mi = chargingstation_dc[chargingstation_dc.State == "MI"]
chargingstation__lv2dc_filtered_mi = chargingstation__lv2dc_filtered[chargingstation__lv2dc_filtered.State == "MI"]
#filter charging stations for Wisconsin.
chargingstation_dc = chargingstation_dc[chargingstation_dc.State == "WI"]
chargingstation__lv2dc_filtered = chargingstation__lv2dc_filtered[chargingstation__lv2dc_filtered.State == "WI"]
#combine fast charging stations for all five states
chargingstation_dc = chargingstation_dc.append(chargingstation_dc_mn)
chargingstation_dc = chargingstation_dc.append(chargingstation_dc_ia)
chargingstation_dc = chargingstation_dc.append(chargingstation_dc_il)
chargingstation_dc = chargingstation_dc.append(chargingstation_dc_mi)
chargingstation_dc
#combine level 2 & fast charging stations for all five states
chargingstation__lv2dc_filtered
chargingstation__lv2dc_filtered = chargingstation__lv2dc_filtered.append(chargingstation__lv2dc_filtered_mn)
chargingstation__lv2dc_filtered = chargingstation__lv2dc_filtered.append(chargingstation__lv2dc_filtered_ia)
chargingstation__lv2dc_filtered = chargingstation__lv2dc_filtered.append(chargingstation__lv2dc_filtered_il)
chargingstation__lv2dc_filtered = chargingstation__lv2dc_filtered.append(chargingstation__lv2dc_filtered_mi)
import json
#read file with hotels in Wisconsin
with open('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/osm_hotel.json', encoding="utf8") as f:
hotelfile = json.load(f)
hotel = hotelfile['elements']
#read file with restaurants in Wisconsin
with open('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/osm_restaurant.json', encoding="utf8") as f:
restaurantfile = json.load(f)
restaurant = restaurantfile['elements']
tourism = hotel + restaurant
#create denisty map for hotels & restaurants
fig = px.density_mapbox(tourism,lat='lat',lon='lon',radius=1, zoom=1.5, mapbox_style = "stamen-terrain")
fig.show()
#create denisty map for hotels
fig = px.density_mapbox(hotel,lat='lat',lon='lon',radius=1, zoom=1.5, mapbox_style = "stamen-terrain")
fig.show()
#create denisty map for restaurant
fig = px.density_mapbox(restaurant,lat='lat',lon='lon',radius=1, zoom=1.5, mapbox_style = "stamen-terrain")
fig.show()
#convert tourism, restaurant and hotel to dataframe
tourism = pd.DataFrame(tourism)
hotel = pd.DataFrame(hotel)
restaurant = pd.DataFrame(restaurant)
#combine station name, city and state to make a unique name
chargingstation_dc["StationNameCityState"] = chargingstation_dc["Station.Name"] + " " + chargingstation_dc["Street.Address"] + " " + chargingstation_dc["City"] + " " + chargingstation_dc["State"]
chargingstation__lv2dc_filtered["StationNameCityState"] = chargingstation__lv2dc_filtered["Station.Name"] + " " + chargingstation__lv2dc_filtered["Street.Address"]+ " " + chargingstation__lv2dc_filtered["City"] + " " + chargingstation__lv2dc_filtered["State"]
#concatenating unique station name, lat and long to create a consolidated location as accepted by havesine function
chargingstation_dc_loc = list(zip(chargingstation_dc['StationNameCityState'],chargingstation_dc.Latitude, chargingstation_dc.Longitude))
chargingstation__lv2dc_filtered_loc = list(zip(chargingstation__lv2dc_filtered['StationNameCityState'],chargingstation__lv2dc_filtered.Latitude, chargingstation__lv2dc_filtered.Longitude))
tourism_location = list(zip(tourism.lat,tourism.lon))
hotel_location = list(zip(hotel.lat,hotel.lon))
restaurant_location = list(zip(restaurant.lat,restaurant.lon))
#remove nan values from lists
tourism_loc = [x for x in tourism_location if str(x) != '(nan, nan)']
hotel_loc = [x for x in hotel_location if str(x) != '(nan, nan)']
restaurant_loc = [x for x in restaurant_location if str(x) != '(nan, nan)']
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="geoapiExercises")
# defining a function to find zip code for each latitude and longitude
def findzipcode(data):
data_zipcode = []
for i in data:
latitude = str(i[0])
longitude = str(i[1])
location = geolocator.reverse(latitude+","+longitude, timeout=None)
if location != None:
address = location.raw['address']
zipcode = address.get('postcode')
if zipcode != None:
zipcode_list = (latitude,longitude,zipcode)
data_zipcode.append(zipcode_list)
return data_zipcode
#run this and the next 3 cells for first run. This takes around 14 hours and should be done differently to improve performance as a next step.
#find zip code for each tourism data set.
#hotel_zipcode = findzipcode(hotel_loc)
#restaurant_zipcode = findzipcode(restaurant_loc)
#tourism_zipcode = findzipcode(tourism_loc)
#write to file for rerun of code as findzipcode takes a very long time to run
#hotel_zipcode.to_csv("C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/hotel.csv")
#restaurant_zipcode.to_csv("C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/restaurant.csv")
#tourism_zipcode.to_csv("C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/tourism.csv")
#read in files vs. running findzipcode - after findzipcode has been run the 1st time
hotel_zipcode = pd.read_csv('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/hotel.csv')
restaurant_zipcode = pd.read_csv('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/restaurant.csv')
tourism_zipcode = pd.read_csv('C:/Users/sbace/Documents/DataScience/DS785_Capstone/DataSources/Good/tourism.csv')
#group by zipcode and count
hotel_zipcode = pd.DataFrame(hotel_zipcode, columns = ['lat','lon','zipcode'])
hotel_count = hotel_zipcode.groupby('zipcode').count()
restaurant_zipcode = pd.DataFrame(restaurant_zipcode, columns = ['lat','lon','zipcode'])
restaurant_count = restaurant_zipcode.groupby('zipcode').count()
tourism_zipcode = pd.DataFrame(tourism_zipcode, columns = ['lat','lon','zipcode'])
tourism_count = tourism_zipcode.groupby('zipcode').count()
#filter by x number within zipcodes
hotel_zipcode_index = hotel_count[hotel_count > hotel_count.lat.median()].index.tolist()
restaurant_zipcode_index = restaurant_count[restaurant_count > restaurant_count.lat.median()].index.tolist()
tourism_zipcode_index = tourism_count[tourism_count > tourism_count.lat.median()].index.tolist()
hotel_count.lat.median(),restaurant_count.lat.median(),tourism_count.lat.median()
len(hotel_zipcode), len(restaurant_zipcode),len(tourism_zipcode)
#function to average latitude & longitude to find one location per zipcode.
#As a next step, a more accurate central location would be to use gis centroid functionality from census data.
def findlatlon(index,data):
data_zipcode = []
for i in index:
zipcode_filtered = data[data.zipcode == i]
zipcode_filtered.lat = [float(x) for x in zipcode_filtered.lat]
calclat = sum(zipcode_filtered.lat) / len(zipcode_filtered.lat)
zipcode_filtered.lon = [float(x) for x in zipcode_filtered.lon]
calclon = sum(zipcode_filtered.lon) / len(zipcode_filtered.lon)
latlon = (i,calclat,calclon)
data_zipcode.append(latlon)
data_zipcode = pd.DataFrame(data_zipcode,columns=['zipcode','lat','lon'])
data_zipcode['latlon'] = list(zip(data_zipcode.lat,data_zipcode.lon))
return data_zipcode
#average latitude & longitude to find one location per zipcode
hotel_zipcode_latlon = findlatlon(hotel_zipcode_index,hotel_zipcode)
restaurant_zipcode_latlon = findlatlon(restaurant_zipcode_index,restaurant_zipcode)
tourism_zipcode_latlon = findlatlon(tourism_zipcode_index,tourism_zipcode)
import haversine as hs
import folium
#defining a function to calculate distance between two locations
#loc1 hotel, restaurant or tourism location
#loc2 charging station
def distance_from(loc1,loc2):
dist=hs.haversine(loc1,loc2)
return round(dist,2)
#convert charging station to data frame
chargingstation__lv2dc_filtered_loc = pd.DataFrame(chargingstation__lv2dc_filtered_loc,columns=['stationNameCityState','lat','lon'])
chargingstation__lv2dc_filtered_loc['latlon'] = list(zip(chargingstation__lv2dc_filtered_loc.lat,chargingstation__lv2dc_filtered_loc.lon))
chargingstation_dc_loc = pd.DataFrame(chargingstation_dc_loc,columns=['stationNameCityState','lat','lon'])
chargingstation_dc_loc['latlon'] = list(zip(chargingstation_dc_loc.lat,chargingstation_dc_loc.lon))
#running a loop which will calculate distance from hotels to each level 2 and fast charging station
for _,row in chargingstation__lv2dc_filtered_loc.iterrows():
hotel_zipcode_latlon[row.stationNameCityState] = hotel_zipcode_latlon['latlon'].apply(lambda x: distance_from(row.latlon,x))
#running a loop which will calculate distance from restaurants to each fast charging station
for _,row in chargingstation_dc_loc.iterrows():
restaurant_zipcode_latlon[row.stationNameCityState] = restaurant_zipcode_latlon['latlon'].apply(lambda x: distance_from(row.latlon,x))
#running a loop which will calculate distance from hotels and restaurants to each fast charging station
for _,row in chargingstation_dc_loc.iterrows():
tourism_zipcode_latlon[row.stationNameCityState] = tourism_zipcode_latlon['latlon'].apply(lambda x: distance_from(row.latlon,x))
#select min distance per row
col_four = hotel_zipcode_latlon.columns[4]
hotel_zipcode_latlon['dist_min'] = hotel_zipcode_latlon.loc[:,col_four:].min(axis=1)
hotel_zipcode_latlon['dist_to'] = hotel_zipcode_latlon.loc[:,col_four:].idxmin(axis=1)
col_four = restaurant_zipcode_latlon.columns[4]
restaurant_zipcode_latlon['dist_min'] = restaurant_zipcode_latlon.loc[:,col_four:].min(axis=1)
restaurant_zipcode_latlon['dist_to'] = restaurant_zipcode_latlon.loc[:,col_four:].idxmin(axis=1)
col_four = tourism_zipcode_latlon.columns[4]
tourism_zipcode_latlon['dist_min'] = tourism_zipcode_latlon.loc[:,col_four:].min(axis=1)
tourism_zipcode_latlon['dist_to'] = tourism_zipcode_latlon.loc[:,col_four:].idxmin(axis=1)
#find nearest charging station name, latitude and longitde for each hotel and/or restaurant
for i,row in hotel_zipcode_latlon.iterrows():
findstation = chargingstation__lv2dc_filtered_loc[chargingstation__lv2dc_filtered_loc.stationNameCityState == row['dist_to']]
hotel_zipcode_latlon.at[i,'dist_lat'] = findstation.lat.values[0]
hotel_zipcode_latlon.at[i,'dist_lon'] = findstation.lon.values[0]
for i,row in restaurant_zipcode_latlon.iterrows():
findstation = chargingstation_dc_loc[chargingstation_dc_loc.stationNameCityState == row['dist_to']]
restaurant_zipcode_latlon.at[i,'dist_lat'] = findstation.lat.values[0]
restaurant_zipcode_latlon.at[i,'dist_lon'] = findstation.lon.values[0]
for i,row in tourism_zipcode_latlon.iterrows():
findstation = chargingstation_dc_loc[chargingstation_dc_loc.stationNameCityState == row['dist_to']]
tourism_zipcode_latlon.at[i,'dist_lat'] = findstation.lat.values[0]
tourism_zipcode_latlon.at[i,'dist_lon'] = findstation.lon.values[0]
hotel_zipcode_latlon
#set variables for minimum, median, and maximum for published ranges and reductions in ranges due to medium and high impacts
stdmin = 170
stdmed = 245
stdmax = 361
medmin = 127.5
medmed = 183.8
medmax = 270.8
highmin = 85
highmed = 122.5
highmax = 180.5
#set boolean for standard ranges min/mean/max
hotel_zipcode_latlon['std_min'] = hotel_zipcode_latlon['dist_min'] >= stdmin
hotel_zipcode_latlon['std_med'] = hotel_zipcode_latlon['dist_min'] >= stdmed
hotel_zipcode_latlon['std_max'] = hotel_zipcode_latlon['dist_min'] >= stdmax
#set boolean for medium impact reduction to ranges min/mean/max
hotel_zipcode_latlon['med_min'] = hotel_zipcode_latlon['dist_min'] >= medmin
hotel_zipcode_latlon['med_med'] = hotel_zipcode_latlon['dist_min'] >= medmed
hotel_zipcode_latlon['med_max'] = hotel_zipcode_latlon['dist_min'] >= medmax
#set boolean for high impact reduction to ranges min/mean/max
hotel_zipcode_latlon['high_min'] = hotel_zipcode_latlon['dist_min'] >= highmin
hotel_zipcode_latlon['high_med'] = hotel_zipcode_latlon['dist_min'] >= highmed
hotel_zipcode_latlon['high_max'] = hotel_zipcode_latlon['dist_min'] >= highmax
#set boolean for standard ranges min/mean/max
restaurant_zipcode_latlon['std_min'] = restaurant_zipcode_latlon['dist_min'] >= stdmin
restaurant_zipcode_latlon['std_med'] = restaurant_zipcode_latlon['dist_min'] >= stdmed
restaurant_zipcode_latlon['std_max'] = restaurant_zipcode_latlon['dist_min'] >= stdmax
#set boolean for medium impact reduction to ranges min/mean/max
restaurant_zipcode_latlon['med_min'] = restaurant_zipcode_latlon['dist_min'] >= medmin
restaurant_zipcode_latlon['med_med'] = restaurant_zipcode_latlon['dist_min'] >= medmed
restaurant_zipcode_latlon['med_max'] = restaurant_zipcode_latlon['dist_min'] >= medmax
#set boolean for high impact reduction to ranges min/mean/max
restaurant_zipcode_latlon['high_min'] = restaurant_zipcode_latlon['dist_min'] >= highmin
restaurant_zipcode_latlon['high_med'] = restaurant_zipcode_latlon['dist_min'] >= highmed
restaurant_zipcode_latlon['high_max'] = restaurant_zipcode_latlon['dist_min'] >= highmax
#set boolean for standard ranges min/mean/max
tourism_zipcode_latlon['std_min'] = tourism_zipcode_latlon['dist_min'] >= stdmin
tourism_zipcode_latlon['std_med'] = tourism_zipcode_latlon['dist_min'] >= stdmed
tourism_zipcode_latlon['std_max'] = tourism_zipcode_latlon['dist_min'] >= stdmax
#set boolean for medium impact reduction to ranges min/mean/max
tourism_zipcode_latlon['med_min'] = tourism_zipcode_latlon['dist_min'] >= medmin
tourism_zipcode_latlon['med_med'] = tourism_zipcode_latlon['dist_min'] >= medmed
tourism_zipcode_latlon['med_max'] = tourism_zipcode_latlon['dist_min'] >= medmax
#set boolean for high impact reduction to ranges min/mean/max
tourism_zipcode_latlon['high_min'] = tourism_zipcode_latlon['dist_min'] >= highmin
tourism_zipcode_latlon['high_med'] = tourism_zipcode_latlon['dist_min'] >= highmed
tourism_zipcode_latlon['high_max'] = tourism_zipcode_latlon['dist_min'] >= highmax
hotel_zipcode_latlon
restaurant_zipcode_latlon
tourism_zipcode_latlon
hotel_std_min = hotel_zipcode_latlon[hotel_zipcode_latlon['std_min'] == True]
hotel_std_med = hotel_zipcode_latlon[hotel_zipcode_latlon['std_med'] == True]
hotel_std_max = hotel_zipcode_latlon[hotel_zipcode_latlon['std_max'] == True]
hotel_med_min = hotel_zipcode_latlon[hotel_zipcode_latlon['med_min'] == True]
hotel_med_med = hotel_zipcode_latlon[hotel_zipcode_latlon['med_med'] == True]
hotel_med_max = hotel_zipcode_latlon[hotel_zipcode_latlon['med_max'] == True]
hotel_high_min = hotel_zipcode_latlon[hotel_zipcode_latlon['high_min'] == True]
hotel_high_med = hotel_zipcode_latlon[hotel_zipcode_latlon['high_med'] == True]
hotel_high_max = hotel_zipcode_latlon[hotel_zipcode_latlon['high_max'] == True]
restaurant_std_min = restaurant_zipcode_latlon[restaurant_zipcode_latlon['std_min'] == True]
restaurant_std_med = restaurant_zipcode_latlon[restaurant_zipcode_latlon['std_med'] == True]
restaurant_std_max = restaurant_zipcode_latlon[restaurant_zipcode_latlon['std_max'] == True]
restaurant_med_min = restaurant_zipcode_latlon[restaurant_zipcode_latlon['med_min'] == True]
restaurant_med_med = restaurant_zipcode_latlon[restaurant_zipcode_latlon['med_med'] == True]
restaurant_med_max = restaurant_zipcode_latlon[restaurant_zipcode_latlon['med_max'] == True]
restaurant_high_min = restaurant_zipcode_latlon[restaurant_zipcode_latlon['high_min'] == True]
restaurant_high_med = restaurant_zipcode_latlon[restaurant_zipcode_latlon['high_med'] == True]
restaurant_high_max = restaurant_zipcode_latlon[restaurant_zipcode_latlon['high_max'] == True]
tourism_std_min = tourism_zipcode_latlon[tourism_zipcode_latlon['std_min'] == True]
tourism_std_med = tourism_zipcode_latlon[tourism_zipcode_latlon['std_med'] == True]
tourism_std_max = tourism_zipcode_latlon[tourism_zipcode_latlon['std_max'] == True]
tourism_med_min = tourism_zipcode_latlon[tourism_zipcode_latlon['med_min'] == True]
tourism_med_med = tourism_zipcode_latlon[tourism_zipcode_latlon['med_med'] == True]
tourism_med_max = tourism_zipcode_latlon[tourism_zipcode_latlon['med_max'] == True]
tourism_high_min = tourism_zipcode_latlon[tourism_zipcode_latlon['high_min'] == True]
tourism_high_med = tourism_zipcode_latlon[tourism_zipcode_latlon['high_med'] == True]
tourism_high_max = tourism_zipcode_latlon[tourism_zipcode_latlon['high_max'] == True]
#map creation of charging station locations along with hotels and/or restaurants outside the range of the nearest charging station
def create_map(poi,stations):
m = folium.Map(location=[poi.lat.mean(), poi.lon.mean()], zoom_start=12, tiles='OpenStreetMap')
for _, row in poi.iterrows():
if row['std_max']==True:
cluster_colour='brown'
r = medmin
f = True
elif row['med_max']==True:
cluster_colour = 'black'
r = medmed
f = True
elif row['std_med']==True:
cluster_colour = 'red'
r = medmax
f = True
elif row['med_med']==True:
cluster_colour = 'yellow'
r = medmax
f = True
elif row['high_max']==True:
cluster_colour = 'blue'
r = medmed
f = True
elif row['std_min']==True:
cluster_colour = 'green'
r = medmax
f = True
elif row['med_min']==True:
cluster_colour = 'purple'
r = medmax
f = True
elif row['high_med']==True:
cluster_colour = 'orange'
r = medmed
f = True
elif row['high_min']==True:
cluster_colour = 'maroon'
r = medmax
f = True
else:
cluster_colour = None
r = 0
f = False
folium.CircleMarker(
location= [row['lat'],row['lon']],
radius=5,
popup= row['zipcode'],
color=cluster_colour,
fill=f,
fill_color=cluster_colour
).add_to(m)
for _, row in stations.iterrows():
folium.Marker(
location= [row['lat'],row['lon']],
#radius=1,
popup= row['stationNameCityState'],
color='blue',
fill=True,
fill_color='blue'
).add_to(m)
for _, row in stations.iterrows():
folium.Circle(
location= [row['lat'],row['lon']],
#radius=1,
popup= row['stationNameCityState'],
color='black',
fill=False,
).add_to(m)
return m
#create map of fast charging stations and restaurants outside the range of the nearest charging stations
create_map(restaurant_zipcode_latlon,chargingstation_dc_loc)
#create map of fast charging stations and restaurants and hotels outside the range of the nearest charging stations
create_map(tourism_zipcode_latlon,chargingstation_dc_loc)
#create map of level 2 and fast charging stations and hotels outside the range of the nearest charging stations
create_map(hotel_zipcode_latlon,chargingstation__lv2dc_filtered_loc)